Data for Lab

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
time_series_confirmed_long <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
  rename(Province_State = "Province/State", Country_Region = "Country/Region")  %>% 
               pivot_longer(-c(Province_State, Country_Region, Lat, Long),
                             names_to = "Date", values_to = "Confirmed") 
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## i Use `spec()` for the full column specifications.
# Let's get the times series data for deaths
time_series_deaths_long <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")) %>%
  rename(Province_State = "Province/State", Country_Region = "Country/Region")  %>% 
  pivot_longer(-c(Province_State, Country_Region, Lat, Long),
               names_to = "Date", values_to = "Deaths")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## i Use `spec()` for the full column specifications.
# Create Keys 
time_series_confirmed_long <- time_series_confirmed_long %>% 
  unite(Key, Province_State, Country_Region, Date, sep = ".", remove = FALSE)
time_series_deaths_long <- time_series_deaths_long %>% 
  unite(Key, Province_State, Country_Region, Date, sep = ".") %>% 
  select(Key, Deaths)
# Join tables
time_series_long_joined <- full_join(time_series_confirmed_long,
    time_series_deaths_long, by = c("Key")) %>% 
    select(-Key)
# Reformat the data
time_series_long_joined$Date <- mdy(time_series_long_joined$Date)
# Create Report table with counts
time_series_long_joined_counts <- time_series_long_joined %>% 
  pivot_longer(-c(Province_State, Country_Region, Lat, Long, Date),
               names_to = "Report_Type", values_to = "Counts")

###Graphic Output
pdf("Images/time_series_example_plot.pdf", width=6, height=3)
time_series_long_joined %>% 
  group_by(Country_Region,Date) %>% 
  summarise_at(c("Confirmed", "Deaths"), sum) %>% 
  filter (Country_Region == "US") %>% 
    ggplot(aes(x = Date,  y = Deaths)) + 
    geom_point() +
    geom_line() +
    ggtitle("US COVID-19 Deaths")
dev.off()
## png 
##   2
# Plot graph to a png outputfile
ppi <- 300
png("Images/time_series_example_plot.png", width=6*ppi, height=6*ppi, res=ppi)
time_series_long_joined %>% 
  group_by(Country_Region,Date) %>% 
  summarise_at(c("Confirmed", "Deaths"), sum) %>% 
  filter (Country_Region == "US") %>% 
    ggplot(aes(x = Date,  y = Deaths)) + 
    geom_point() +
    geom_line() +
    ggtitle("US COVID-19 Deaths")
dev.off()
## png 
##   2

US COVID-19 Deaths ### Interactive Graphs

# Version 2
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
ggplotly(
  time_series_long_joined %>% 
    group_by(Country_Region,Date) %>% 
    summarise_at(c("Confirmed", "Deaths"), sum) %>% 
    filter (Country_Region == "US") %>% 
    ggplot(aes(x = Date,  y = Deaths)) + 
      geom_point() +
      geom_line() +
      ggtitle("US COVID-19 Deaths")
 )
library(plotly)
# Subset the time series data to include US deaths
US_deaths <- time_series_long_joined %>% 
    group_by(Country_Region,Date) %>% 
    summarise_at(c("Confirmed", "Deaths"), sum) %>% 
    filter (Country_Region == "US")
# Collect the layers for agraph of the US time series data for covid deaths
 p <- ggplot(data = US_deaths, aes(x = Date,  y = Deaths)) + 
        geom_point() +
        geom_line() +
        ggtitle("US COVID-19 Deaths")
# Plot the graph using ggplotly
ggplotly(p)

###Creating animation

library(gganimate)
library(transformr)
theme_set(theme_bw())

data_time <- time_series_long_joined %>% 
    group_by(Country_Region,Date) %>% 
    summarise_at(c("Confirmed", "Deaths"), sum) %>% 
    filter (Country_Region %in% c("China","Korea, South","Japan","Italy","US")) 
p <- ggplot(data_time, aes(x = Date,  y = Confirmed, color = Country_Region)) + 
      geom_point() +
      geom_line() +
      ggtitle("Confirmed COVID-19 Cases") +
      geom_point(aes(group = seq_along(Date))) +
      transition_reveal(Date) 
# Some people needed to use this line instead
# animate(p,renderer = gifski_renderer(), end_pause = 15)
animate(p, end_pause = 15)

data_time <- time_series_long_joined %>% 
    group_by(Country_Region,Date) %>% 
    summarise_at(c("Confirmed", "Deaths"), sum) %>% 
    filter (Country_Region %in% c("China","Korea, South","Japan","Italy","US")) 
p <- ggplot(data_time, aes(x = Date,  y = Confirmed, color = Country_Region)) + 
      geom_point() +
      geom_line() +
      ggtitle("Confirmed COVID-19 Cases") +
      geom_point(aes(group = seq_along(Date))) +
      transition_reveal(Date) 
# Some people needed to use this line instead
# animate(p,renderer = gifski_renderer(), end_pause = 15)
anim_save("deaths_5_countries.gif", p)

###Exercises1

ppi <- 300
png("Images/time_series_Mine_plot.png", width=3*ppi, height=3*ppi, res=ppi)
time_series_long_joined %>% 
  group_by(Country_Region,Date) %>% 
  summarise_at(c("Confirmed", "Deaths"), sum) %>% 
  filter (Country_Region == "US") %>% 
    ggplot(aes(x = Date,  y = Deaths)) + 
    geom_point() +
    geom_line() +
    ggtitle("US COVID-19 Deaths Dark")+
  theme_dark()
dev.off()
## png 
##   2

US COVID-19 Deaths Dark

###Exercise 2

ggplotly(
  time_series_long_joined_counts %>% 
  group_by(Country_Region, Report_Type, Date) %>% 
  summarise(Counts = sum(Counts)) %>% 
  filter (Country_Region == "US") %>% 
  filter (Report_Type == "Confirmed") %>% 
    ggplot(aes(x = Date,  y = log2(Counts), fill = Report_Type, color = Report_Type)) + 
    geom_point() +
    geom_line() +
    ggtitle("Confirmed US COVID-19 Cases") +
    facet_wrap(~Country_Region, nrow=5, scales="free") +
  theme_dark()
)
## `summarise()` regrouping output by 'Country_Region', 'Report_Type' (override with `.groups` argument)
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

###Exercise 3

time_series_confirmed <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
  rename(Province_State = "Province/State", Country_Region = "Country/Region")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## i Use `spec()` for the full column specifications.
time_series_confirmed <- read_csv("data/time_series_covid19_confirmed_global.csv")%>%
  rename(Province_State = "Province/State", Country_Region = "Country/Region")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## i Use `spec()` for the full column specifications.
time_series_confirmed_long <- time_series_confirmed %>% 
               pivot_longer(-c(Province_State, Country_Region, Lat, Long),
                            names_to = "Date", values_to = "Confirmed") 

 time_series_deaths <- read_csv("data/time_series_covid19_deaths_global.csv")%>%
  rename(Province_State = "Province/State", Country_Region = "Country/Region")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## i Use `spec()` for the full column specifications.
 time_series_deaths_long <- time_series_deaths %>% 
               pivot_longer(-c(Province_State, Country_Region, Lat, Long),
                            names_to = "Date", values_to = "Deaths") 
head(time_series_deaths_long)
## # A tibble: 6 x 6
##   Province_State Country_Region   Lat  Long Date    Deaths
##   <chr>          <chr>          <dbl> <dbl> <chr>    <dbl>
## 1 <NA>           Afghanistan     33.9  67.7 1/22/20      0
## 2 <NA>           Afghanistan     33.9  67.7 1/23/20      0
## 3 <NA>           Afghanistan     33.9  67.7 1/24/20      0
## 4 <NA>           Afghanistan     33.9  67.7 1/25/20      0
## 5 <NA>           Afghanistan     33.9  67.7 1/26/20      0
## 6 <NA>           Afghanistan     33.9  67.7 1/27/20      0
time_series_confirmed_long <- time_series_confirmed_long %>% 
  unite(Key, Province_State, Country_Region, Date, sep = ".", remove = FALSE)
head(time_series_confirmed_long)
## # A tibble: 6 x 7
##   Key                 Province_State Country_Region   Lat  Long Date   Confirmed
##   <chr>               <chr>          <chr>          <dbl> <dbl> <chr>      <dbl>
## 1 NA.Afghanistan.1/2~ <NA>           Afghanistan     33.9  67.7 1/22/~         0
## 2 NA.Afghanistan.1/2~ <NA>           Afghanistan     33.9  67.7 1/23/~         0
## 3 NA.Afghanistan.1/2~ <NA>           Afghanistan     33.9  67.7 1/24/~         0
## 4 NA.Afghanistan.1/2~ <NA>           Afghanistan     33.9  67.7 1/25/~         0
## 5 NA.Afghanistan.1/2~ <NA>           Afghanistan     33.9  67.7 1/26/~         0
## 6 NA.Afghanistan.1/2~ <NA>           Afghanistan     33.9  67.7 1/27/~         0
time_series_deaths_long <- time_series_deaths_long %>% 
  unite(Key, Province_State, Country_Region, Date, sep = ".") %>% 
  select(Key, Deaths)

time_series_long_joined <- full_join(time_series_confirmed_long,
              time_series_deaths_long, by = c("Key")) %>% 
              select(-Key)
head(time_series_long_joined)
## # A tibble: 6 x 7
##   Province_State Country_Region   Lat  Long Date    Confirmed Deaths
##   <chr>          <chr>          <dbl> <dbl> <chr>       <dbl>  <dbl>
## 1 <NA>           Afghanistan     33.9  67.7 1/22/20         0      0
## 2 <NA>           Afghanistan     33.9  67.7 1/23/20         0      0
## 3 <NA>           Afghanistan     33.9  67.7 1/24/20         0      0
## 4 <NA>           Afghanistan     33.9  67.7 1/25/20         0      0
## 5 <NA>           Afghanistan     33.9  67.7 1/26/20         0      0
## 6 <NA>           Afghanistan     33.9  67.7 1/27/20         0      0
time_series_long_joined$Date <- mdy(time_series_long_joined$Date)

time_series_long_joined_counts <- time_series_long_joined %>% 
  pivot_longer(-c(Province_State, Country_Region, Lat, Long, Date),
               names_to = "Report_Type", values_to = "Counts")
head(time_series_long_joined_counts)
## # A tibble: 6 x 7
##   Province_State Country_Region   Lat  Long Date       Report_Type Counts
##   <chr>          <chr>          <dbl> <dbl> <date>     <chr>        <dbl>
## 1 <NA>           Afghanistan     33.9  67.7 2020-01-22 Confirmed        0
## 2 <NA>           Afghanistan     33.9  67.7 2020-01-22 Deaths           0
## 3 <NA>           Afghanistan     33.9  67.7 2020-01-23 Confirmed        0
## 4 <NA>           Afghanistan     33.9  67.7 2020-01-23 Deaths           0
## 5 <NA>           Afghanistan     33.9  67.7 2020-01-24 Confirmed        0
## 6 <NA>           Afghanistan     33.9  67.7 2020-01-24 Deaths           0
data_time <- time_series_long_joined %>% 
    group_by(Country_Region,Date) %>% 
    summarise_at(c("Confirmed", "Deaths"), sum) %>% 
    filter (Country_Region %in% c("US","Brazil","India","Mexico","UK","Italy","France", "Iran", "Spain", "Russia")) 
p <- ggplot(data_time, aes(x = Date,  y = Confirmed, color = Country_Region)) + 
      geom_point() +
      geom_line() +
      ggtitle("Top 10 Confirmed COVID-19 Cases") +
      geom_point(aes(group = seq_along(Date))) +
      transition_reveal(Date) 
# Some people needed to use this line instead
# animate(p,renderer = gifski_renderer(), end_pause = 15)
animate(p, end_pause = 15)